import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import plotly.express as px
warnings.filterwarnings("ignore")
data=pd.read_csv("globalterrorismdb_0718dist.csv" ,encoding='latin1')
data.head()
| eventid | iyear | imonth | iday | approxdate | extended | resolution | country | country_txt | region | ... | addnotes | scite1 | scite2 | scite3 | dbsource | INT_LOG | INT_IDEO | INT_MISC | INT_ANY | related | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 197000000001 | 1970 | 7 | 2 | NaN | 0 | NaN | 58 | Dominican Republic | 2 | ... | NaN | NaN | NaN | NaN | PGIS | 0 | 0 | 0 | 0 | NaN |
| 1 | 197000000002 | 1970 | 0 | 0 | NaN | 0 | NaN | 130 | Mexico | 1 | ... | NaN | NaN | NaN | NaN | PGIS | 0 | 1 | 1 | 1 | NaN |
| 2 | 197001000001 | 1970 | 1 | 0 | NaN | 0 | NaN | 160 | Philippines | 5 | ... | NaN | NaN | NaN | NaN | PGIS | -9 | -9 | 1 | 1 | NaN |
| 3 | 197001000002 | 1970 | 1 | 0 | NaN | 0 | NaN | 78 | Greece | 8 | ... | NaN | NaN | NaN | NaN | PGIS | -9 | -9 | 1 | 1 | NaN |
| 4 | 197001000003 | 1970 | 1 | 0 | NaN | 0 | NaN | 101 | Japan | 4 | ... | NaN | NaN | NaN | NaN | PGIS | -9 | -9 | 1 | 1 | NaN |
5 rows × 135 columns
data.tail()
| eventid | iyear | imonth | iday | approxdate | extended | resolution | country | country_txt | region | ... | addnotes | scite1 | scite2 | scite3 | dbsource | INT_LOG | INT_IDEO | INT_MISC | INT_ANY | related | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 181686 | 201712310022 | 2017 | 12 | 31 | NaN | 0 | NaN | 182 | Somalia | 11 | ... | NaN | "Somalia: Al-Shabaab Militants Attack Army Che... | "Highlights: Somalia Daily Media Highlights 2 ... | "Highlights: Somalia Daily Media Highlights 1 ... | START Primary Collection | 0 | 0 | 0 | 0 | NaN |
| 181687 | 201712310029 | 2017 | 12 | 31 | NaN | 0 | NaN | 200 | Syria | 10 | ... | NaN | "Putin's 'victory' in Syria has turned into a ... | "Two Russian soldiers killed at Hmeymim base i... | "Two Russian servicemen killed in Syria mortar... | START Primary Collection | -9 | -9 | 1 | 1 | NaN |
| 181688 | 201712310030 | 2017 | 12 | 31 | NaN | 0 | NaN | 160 | Philippines | 5 | ... | NaN | "Maguindanao clashes trap tribe members," Phil... | NaN | NaN | START Primary Collection | 0 | 0 | 0 | 0 | NaN |
| 181689 | 201712310031 | 2017 | 12 | 31 | NaN | 0 | NaN | 92 | India | 6 | ... | NaN | "Trader escapes grenade attack in Imphal," Bus... | NaN | NaN | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
| 181690 | 201712310032 | 2017 | 12 | 31 | NaN | 0 | NaN | 160 | Philippines | 5 | ... | NaN | "Security tightened in Cotabato following IED ... | "Security tightened in Cotabato City," Manila ... | NaN | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
5 rows × 135 columns
data.shape
(181691, 135)
data.dtypes
eventid int64
iyear int64
imonth int64
iday int64
approxdate object
...
INT_LOG int64
INT_IDEO int64
INT_MISC int64
INT_ANY int64
related object
Length: 135, dtype: object
data.index
RangeIndex(start=0, stop=181691, step=1)
data.size
24528285
data.columns
Index(['eventid', 'iyear', 'imonth', 'iday', 'approxdate', 'extended',
'resolution', 'country', 'country_txt', 'region',
...
'addnotes', 'scite1', 'scite2', 'scite3', 'dbsource', 'INT_LOG',
'INT_IDEO', 'INT_MISC', 'INT_ANY', 'related'],
dtype='object', length=135)
data.isnull().sum()
eventid 0
iyear 0
imonth 0
iday 0
approxdate 172452
...
INT_LOG 0
INT_IDEO 0
INT_MISC 0
INT_ANY 0
related 156653
Length: 135, dtype: int64
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 181691 entries, 0 to 181690 Columns: 135 entries, eventid to related dtypes: float64(55), int64(22), object(58) memory usage: 187.1+ MB
data.describe()
| eventid | iyear | imonth | iday | extended | country | region | latitude | longitude | specificity | ... | ransomamt | ransomamtus | ransompaid | ransompaidus | hostkidoutcome | nreleased | INT_LOG | INT_IDEO | INT_MISC | INT_ANY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.816910e+05 | 181691.000000 | 181691.000000 | 181691.000000 | 181691.000000 | 181691.000000 | 181691.000000 | 177135.000000 | 1.771340e+05 | 181685.000000 | ... | 1.350000e+03 | 5.630000e+02 | 7.740000e+02 | 552.000000 | 10991.000000 | 10400.000000 | 181691.000000 | 181691.000000 | 181691.000000 | 181691.000000 |
| mean | 2.002705e+11 | 2002.638997 | 6.467277 | 15.505644 | 0.045346 | 131.968501 | 7.160938 | 23.498343 | -4.586957e+02 | 1.451452 | ... | 3.172530e+06 | 5.784865e+05 | 7.179437e+05 | 240.378623 | 4.629242 | -29.018269 | -4.543731 | -4.464398 | 0.090010 | -3.945952 |
| std | 1.325957e+09 | 13.259430 | 3.388303 | 8.814045 | 0.208063 | 112.414535 | 2.933408 | 18.569242 | 2.047790e+05 | 0.995430 | ... | 3.021157e+07 | 7.077924e+06 | 1.014392e+07 | 2940.967293 | 2.035360 | 65.720119 | 4.543547 | 4.637152 | 0.568457 | 4.691325 |
| min | 1.970000e+11 | 1970.000000 | 0.000000 | 0.000000 | 0.000000 | 4.000000 | 1.000000 | -53.154613 | -8.618590e+07 | 1.000000 | ... | -9.900000e+01 | -9.900000e+01 | -9.900000e+01 | -99.000000 | 1.000000 | -99.000000 | -9.000000 | -9.000000 | -9.000000 | -9.000000 |
| 25% | 1.991021e+11 | 1991.000000 | 4.000000 | 8.000000 | 0.000000 | 78.000000 | 5.000000 | 11.510046 | 4.545640e+00 | 1.000000 | ... | 0.000000e+00 | 0.000000e+00 | -9.900000e+01 | 0.000000 | 2.000000 | -99.000000 | -9.000000 | -9.000000 | 0.000000 | -9.000000 |
| 50% | 2.009022e+11 | 2009.000000 | 6.000000 | 15.000000 | 0.000000 | 98.000000 | 6.000000 | 31.467463 | 4.324651e+01 | 1.000000 | ... | 1.500000e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000 | 4.000000 | 0.000000 | -9.000000 | -9.000000 | 0.000000 | 0.000000 |
| 75% | 2.014081e+11 | 2014.000000 | 9.000000 | 23.000000 | 0.000000 | 160.000000 | 10.000000 | 34.685087 | 6.871033e+01 | 1.000000 | ... | 4.000000e+05 | 0.000000e+00 | 1.273412e+03 | 0.000000 | 7.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| max | 2.017123e+11 | 2017.000000 | 12.000000 | 31.000000 | 1.000000 | 1004.000000 | 12.000000 | 74.633553 | 1.793667e+02 | 5.000000 | ... | 1.000000e+09 | 1.320000e+08 | 2.750000e+08 | 48000.000000 | 7.000000 | 2769.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
8 rows × 77 columns
data.rename(columns={'iyear':'Year','imonth':'Month','iday':"day",'gname':'Group','country_txt':'Country','region_txt':'Region','provstate':'State','city':'City','latitude':'latitude',
'longitude':'longitude','summary':'summary','attacktype1_txt':'Attacktype','targtype1_txt':'Targettype','weaptype1_txt':'Weapon','nkill':'kill',
'nwound':'Wound'},inplace=True)
data = data[['Year','Month','day','Country','State','Region','City','latitude','longitude',"Attacktype",'kill',
'Wound','target1','summary','Group','Targettype','Weapon','motive']]
Filling NA values with zero
data['Wound'] = data['Wound'].fillna(0)
data['kill'] = data['kill'].fillna(0)
data['Casulty'] = data['Wound'] + data['kill']
data.head()
| Year | Month | day | Country | State | Region | City | latitude | longitude | Attacktype | kill | Wound | target1 | summary | Group | Targettype | Weapon | motive | Casulty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1970 | 7 | 2 | Dominican Republic | NaN | Central America & Caribbean | Santo Domingo | 18.456792 | -69.951164 | Assassination | 1.0 | 0.0 | Julio Guzman | NaN | MANO-D | Private Citizens & Property | Unknown | NaN | 1.0 |
| 1 | 1970 | 0 | 0 | Mexico | Federal | North America | Mexico city | 19.371887 | -99.086624 | Hostage Taking (Kidnapping) | 0.0 | 0.0 | Nadine Chaval, daughter | NaN | 23rd of September Communist League | Government (Diplomatic) | Unknown | NaN | 0.0 |
| 2 | 1970 | 1 | 0 | Philippines | Tarlac | Southeast Asia | Unknown | 15.478598 | 120.599741 | Assassination | 1.0 | 0.0 | Employee | NaN | Unknown | Journalists & Media | Unknown | NaN | 1.0 |
| 3 | 1970 | 1 | 0 | Greece | Attica | Western Europe | Athens | 37.997490 | 23.762728 | Bombing/Explosion | 0.0 | 0.0 | U.S. Embassy | NaN | Unknown | Government (Diplomatic) | Explosives | NaN | 0.0 |
| 4 | 1970 | 1 | 0 | Japan | Fukouka | East Asia | Fukouka | 33.580412 | 130.396361 | Facility/Infrastructure Attack | 0.0 | 0.0 | U.S. Consulate | NaN | Unknown | Government (Diplomatic) | Incendiary | NaN | 0.0 |
px.scatter_mapbox(data, lat="latitude",
lon="longitude",
color="Wound",
size="Casulty",
size_max=20, zoom=2,
mapbox_style="carto-positron")
pd.crosstab(data.Year, data.Region).plot(kind='area',stacked=False,figsize=(20,10))
plt.title('Terrorist Activities By Region In Each Year',fontsize=25)
plt.ylabel('Number of Attacks',fontsize=20)
plt.xlabel("Year",fontsize=20)
plt.show()
px.scatter(data, x="kill", y="Wound",
animation_frame="Year",
animation_group="Country",
size="Year",
color="Country",
hover_name="Country",
log_x=True, size_max=45, range_x=[1,50], range_y=[1,50])
year = data['Year'].unique()
years_count = data['Year'].value_counts(dropna = False).sort_index()
plt.figure(figsize = (18,10))
sns.barplot(x = year,
y = years_count,
palette = "tab10")
plt.xticks(rotation = 50)
plt.xlabel('Attacking Year',fontsize=20)
plt.ylabel('Number of Attacks Each Year',fontsize=20)
plt.title('Attacks In Years',fontsize=25)
plt.show()
attack = data.Country.value_counts()[:10]
attack
Iraq 24636 Pakistan 14368 Afghanistan 12731 India 11960 Colombia 8306 Philippines 6908 Peru 6096 El Salvador 5320 United Kingdom 5235 Turkey 4292 Name: Country, dtype: int64
data.Group.value_counts()[1:11]
Taliban 7478 Islamic State of Iraq and the Levant (ISIL) 5613 Shining Path (SL) 4555 Farabundo Marti National Liberation Front (FMLN) 3351 Al-Shabaab 3288 New People's Army (NPA) 2772 Irish Republican Army (IRA) 2671 Revolutionary Armed Forces of Colombia (FARC) 2487 Boko Haram 2418 Kurdistan Workers' Party (PKK) 2310 Name: Group, dtype: int64
plt.subplots(figsize=(20,10))
sns.barplot(data['Country'].value_counts()[:10].index,data['Country'].value_counts()[:10].values,palette='rocket')
plt.title('Top Countries Affected', fontsize=25)
plt.xlabel('Countries', fontsize=20)
plt.ylabel('Count', fontsize=20)
plt.xticks(rotation = 50)
plt.show()
data1 = data[['Year','kill']].groupby(['Year']).sum()
fig, ax4 = plt.subplots(figsize=(20,10))
data1.plot(kind='bar',alpha=0.7,ax=ax4)
plt.xticks(rotation = 50)
plt.title("People Died Due To Attack",fontsize=25)
plt.ylabel("Number of killed peope",fontsize=20)
plt.xlabel('Year',fontsize=20)
top_side = ax4.spines["top"]
top_side.set_visible(False)
right_side = ax4.spines["right"]
right_side.set_visible(False)
data['City'].value_counts().to_frame().sort_values('City',axis=0,ascending=False).head(10).plot(kind='bar',figsize=(20,10),color='red')
plt.xticks(rotation = 50)
plt.xlabel("City",fontsize=20)
plt.ylabel("Number of attack",fontsize=20)
plt.title("Top 10 most effected city",fontsize=25)
plt.show()
px.scatter_3d(data, x='Attacktype', y='kill', z='Wound', color='Casulty', title="Number of killed and Wounded")
plt.subplots(figsize=(20,10))
sns.countplot(data["Targettype"],order=data['Targettype'].value_counts().index,palette="gist_heat",edgecolor=sns.color_palette("mako"));
plt.xticks(rotation=90)
plt.xlabel("Attacktype",fontsize=20)
plt.ylabel("count",fontsize=20)
plt.title("Type of attack",fontsize=25)
plt.show()
data['Group'].value_counts().to_frame().drop('Unknown').head(10).plot(kind='bar',color='yellow',figsize=(20,10))
plt.title("Top 10 terrorist group attack",fontsize=25)
plt.xlabel("terrorist group name",fontsize=20)
plt.ylabel("Attack number",fontsize=20)
plt.show()
data1=data[['Group','Country','kill']]
data1=data1.groupby(['Group','Country'],axis=0).sum().sort_values('kill',ascending=False).drop('Unknown').reset_index().head(10)
data1
| Group | Country | kill | |
|---|---|---|---|
| 0 | Islamic State of Iraq and the Levant (ISIL) | Iraq | 31058.0 |
| 1 | Taliban | Afghanistan | 29269.0 |
| 2 | Boko Haram | Nigeria | 16917.0 |
| 3 | Shining Path (SL) | Peru | 11595.0 |
| 4 | Liberation Tigers of Tamil Eelam (LTTE) | Sri Lanka | 10928.0 |
| 5 | Al-Shabaab | Somalia | 8176.0 |
| 6 | Farabundo Marti National Liberation Front (FMLN) | El Salvador | 8019.0 |
| 7 | Islamic State of Iraq and the Levant (ISIL) | Syria | 6883.0 |
| 8 | Nicaraguan Democratic Force (FDN) | Nicaragua | 6630.0 |
| 9 | Tehrik-i-Taliban Pakistan (TTP) | Pakistan | 6014.0 |
1.The most numbers of attacks was done by : Taliban
2.The most affected country with 24636 attacks is : Iraq
3.Most Number of attacks were done in the year : 2014
4.The most affected region is : Middle-East and north Africa
5.The most affected countries are : Iraq is the most affected country followed by Pakistan, Afghanistan and India
6.The badly affected by the terrorist attacks : Baghdad and Karachi
7.Attacktype with most wounded : Bombing/Explosion
8.Major Terrorist group : ISIL